#%%
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import numpy as np
import time
# import tensorflow as tf

import gym
env = gym.make('CartPole-v0')

env.reset()           # gym环境的初始化

episode = 0           # 当前进行的片段episodes的个数
reward_sum = 0        # 一个片段episode内获得的reward的总和

episodes = 10000     # 总共进行的episode个数
reward_sum_list=[]   # 记录每个episode结束时获得的reward总和
start_time=time.time()
while episode < episodes:
    #env.render()      # 绘制动画, 一帧图片

    """从0,1动作内随机选择一个进行执行，获得observation，reward，done"""
    observation, reward, done, _ = env.step(np.random.randint(0,2))

    """累计一个episode内获得的reward之和"""
    reward_sum += reward

    if done:         #判断这次的episode是否终止
        episode += 1    #计算一共进行了多少次episode
        reward_sum_list.append(reward_sum)
        print("Reward for episode %s was:"%episode, reward_sum)

        reward_sum = 0 # 清空当前episode内reward的总和
        env.reset()    # 再次对gym环境进行初始化
end_time=time.time()
print( "随机选择动作的情况下每个episode的reward总和均值为： ", np.mean(reward_sum_list) )
print("总用时为 ", (end_time-start_time), " 秒")